Create a new analysis directories.
- general directory
- for plots
- for output of summary results
- for baseline tables
* General packages...
* Genomic packages...
In this notebook we explore the files for the bulk RNAseq analyses and select the subset of data we need.
Here we obtain data from the MR_CVD_MDD in plaques.
Genes.xlsx - list of genes of interest.Variants.xlsx - list of variant(s) of interest.library(openxlsx)
gene_list_df <- read.xlsx(paste0(TARGET_loc, "/targets.xlsx"), sheet = "Genes")
gene_list <- unlist(gene_list_df$Gene)
gene_list [1] "ADAP1" "BAHD1" "CHAF1A" "DISP2" "FAM98B" "GNA11" "GPR176" "KNL1" "MDFIC" "MPND" "NDUFA11" "PIP5K1C" "SUN1" "TLE5" "ZFAND2A"
First we will load the data:
Now you need to choose which of the datasets you will use for the downstream analyses.
estimateSizeFactors()vst()Note: we advise using
vst()transformed normalized data
# AERNAScomboSE <- readRDS(file = paste0(OUT_loc, "/",Today,".AERNAScomboSEvst.CEA.1093pts.SE.after_qc.IC_academic.RDS"))
# AERNAScomboSE <- readRDS(file = paste0(OUT_loc, "/20240109.AERNAScomboSEvst.CEA.1093pts.SE.after_qc.IC_academic.RDS"))
AERNAScomboSE <- readRDS(file = paste0(OUT_loc, "/",Today,".AERNAS1SEvst.CEA.624pts.SE.after_qc.IC_academic.RDS"))First, we extract the clinical data.
bulkRNA_meta_clin <- as.tibble(colData(AERNAScomboSE))
bulkRNA_meta_clin %<>%
# mutate(macrophages = factor(macrophages, levels = c("no staining", "minor staining", "moderate staining", "heavy staining"))) %>%
# mutate(smc = factor(smc, levels = c("no staining", "minor staining", "moderate staining", "heavy staining"))) %>%
# mutate(calcification = factor(calcification, levels = c("no staining", "minor staining", "moderate staining", "heavy staining"))) %>%
# mutate(collagen = factor(collagen, levels = c("no staining", "minor staining", "moderate staining", "heavy staining"))) %>%
# mutate(fat = factor(fat, levels = c("no fat", "< 40% fat", "> 40% fat"))) %>%
mutate(study_number_row = STUDY_NUMBER) %>%
as.data.frame() %>%
column_to_rownames("study_number_row")mutate: new variable 'study_number_row' (character) with 624 unique values and 0% NA
names(bulkRNA_meta_clin)[names(bulkRNA_meta_clin) == "STUDY_NUMBER"] <- "study_number"
head(bulkRNA_meta_clin)[1] 624 69
Second, we define the variables we need.
# Baseline table variables
basetable_vars = c("Hospital", "ORyear", "Artery_summary",
"Age", "Gender",
# "TC_finalCU", "LDL_finalCU", "HDL_finalCU", "TG_finalCU",
"TC_final", "LDL_final", "HDL_final", "TG_final",
# "hsCRP_plasma",
"systolic", "diastoli", "GFR_MDRD", "BMI",
"KDOQI", "BMI_WHO",
"SmokerStatus", "AlcoholUse",
"DiabetesStatus",
"Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs",
"Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD",
"Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
"Symptoms.Update2G", "Symptoms.Update3G",
"restenos", "stenose", "Artery_summary",
"CAD_history", "PAOD", "Peripheral.interv",
"EP_composite", "EP_composite_time", "epcom.3years",
"EP_major", "EP_major_time","epmajor.3years",
"MAC_rankNorm", "SMC_rankNorm", "Macrophages.bin", "SMC.bin",
"Neutrophils_rankNorm", "MastCells_rankNorm",
"IPH.bin", "VesselDensity_rankNorm",
"Calc.bin", "Collagen.bin",
"Fat.bin_10", "Fat.bin_40",
"OverallPlaquePhenotype", "Plaque_Vulnerability_Index")
basetable_bin = c("Gender", "Artery_summary",
"KDOQI", "BMI_WHO",
"SmokerStatus", "AlcoholUse",
"DiabetesStatus",
"Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs",
"Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD",
"Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
"Symptoms.Update2G", "Symptoms.Update3G",
"restenos", "stenose", "Artery_summary",
"CAD_history", "PAOD", "Peripheral.interv",
"EP_composite", "Macrophages.bin", "SMC.bin",
"IPH.bin",
"Calc.bin", "Collagen.bin",
"Fat.bin_10", "Fat.bin_40",
"OverallPlaquePhenotype", "Plaque_Vulnerability_Index")
# basetable_bin
basetable_con = basetable_vars[!basetable_vars %in% basetable_bin]
# basetable_conFrom here we can analyze whether specific genes differ between groups, or do this for the entire gene set as part of DE analysis, and then select our genes of interest.
From there, we extract the gene expression values, plus gene identifier, and select the genes of our interest.
expression_data <- assay(AERNAScomboSE)
# extract expression values from vsd, including ensembl names
expression_data <- as_tibble(data.frame(gene_ensembl = rowRanges(AERNAScomboSE)$feature_id, assay(AERNAScomboSE))) %>%
mutate_at(vars(c("gene_ensembl")), list(as.character)) ## gene_ensembl needs to be character for annotation to workmutate_at: no changes
# annotations
# gene symbol - via org.Hs.eg.db
# columns(org.Hs.eg.db)
expression_data$symbol <- mapIds(org.Hs.eg.db,
keys = expression_data$gene_ensembl,
column = "SYMBOL",
keytype = "ENSEMBL",
multiVals = "first")'select()' returned 1:many mapping between keys and columns
# tidy and subset
expression_data_sel <- expression_data %>%
dplyr::select(gene_ensembl, symbol, everything()) %>%
# filter(symbol == "APOE" | symbol == "TRIB3") %>% # filter APOE and TRIB3
dplyr::filter(symbol %in% gene_list)
head(expression_data_sel)
# tidy and subset non-selected genes
set.seed(141619)
expression_data_sample <- expression_data %>%
dplyr::select(gene_ensembl, symbol, everything()) %>%
sample_n(1000) %>%
unite(symbol_ensembl, symbol, gene_ensembl, sep = "_", remove = FALSE)sample_n: removed 20,835 rows (95%), 1,000 rows remaining
expression_data_sample_mean <- expression_data_sample %>%
select_if(is.numeric) %>%
colMeans() %>%
as_tibble(rownames = "study_number") %>%
dplyr::rename(expression_value_sample = value)select_if: dropped 3 variables (symbol_ensembl, gene_ensembl, symbol)
Furthermore, the expression_data_sel df was gathered into a long form
df for annotation with symptoms variables from the
vsd object, and later visualization and statistics.
# gather expression_data_sel df into long df form for annotation, plotting and statistics
expression_long <-
gather(expression_data_sel, key = "study_number", value = "expression_value", -c(gene_ensembl, symbol))gather: reorganized (ae1, ae1026, ae1029, ae1032, ae1035, …) into (study_number, expression_value) [was 15x626, now 9360x4]
# old school way
# Annotate with smoking variables
# sample_ids <- expression_long$study_number
# mm <- match(expression_long$study_number, rownames(colData(vsd)))
#
# ## Add traits to df
# ## Binary traits
# expression_long$sex <- colData(vsd)$sex[mm]
# expression_long$testosterone <- colData(vsd)$testosterone[mm]
# expression_long$t_e2_ratio <- colData(vsd)$t_e2_ratio[mm]
# new school way
plaque_phenotypes_cat <- c("Macrophages.bin",
"SMC.bin",
"Calc.bin",
"Collagen.bin",
"Fat.bin_10",
# "Fat.bin_40",
"IPH.bin")
plaque_phenotypes_num <- c("MAC_rankNorm", #"macmean0",
"SMC_rankNorm", #"smcmean0",
# "MastCells_rankNorm", #"mast_cells_plaque",
# "Neutrophils_rankNorm", #"neutrophils",
"VesselDensity_rankNorm")
expression_long <- expression_long %>%
left_join(bulkRNA_meta_clin %>% dplyr::select(study_number,
plaque_phenotypes_cat,
plaque_phenotypes_num,
epmajor.3years,
#epmajor.30days,
AsymptSympt2G,
Gender, Hospital, StudyName),
by = "study_number") %>%
mutate(epmajor_3years_yn = str_replace_all(epmajor.3years, c("Excluded" = "yes", "Included" = "no"))) #%>%Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(plaque_phenotypes_cat)
# Now:
data %>% select(all_of(plaque_phenotypes_cat))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(plaque_phenotypes_num)
# Now:
data %>% select(all_of(plaque_phenotypes_num))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.left_join: added 14 columns (Macrophages.bin, SMC.bin, Calc.bin, Collagen.bin, Fat.bin_10, …) > rows only in x 0 > rows only in bulkRNA_meta_clin %>% d.. ( 0) > matched rows 9,360 > ======= > rows total 9,360mutate: new variable 'epmajor_3years_yn' (character) with 3 unique values and 1% NA
# mutate(epmajor.30days_yn = str_replace_all(epmajor.30days, c("Excluded" = "yes", "Included" = "no")))
head(expression_long)In case some genes are not available in our data we could filter them here.
[1] "ADAP1" "BAHD1" "CHAF1A" "DISP2" "FAM98B" "GNA11" "GPR176" "KNL1" "MDFIC" "MPND" "NDUFA11" "PIP5K1C" "SUN1" "TLE5" "ZFAND2A"
This code is just an example to filter the list from genes that are not in the data.
Figure 1: Expression of genes of interest: boxplots
# Make directory for plots
ifelse(!dir.exists(file.path(QC_loc, "/Boxplots")),
dir.create(file.path(QC_loc, "/Boxplots")),
FALSE)[1] FALSE
BOX_loc = paste0(QC_loc,"/Boxplots")
for(GENE in gene_list_qc){
cat(paste0("Plotting expression for ", GENE,".\n"))
temp <- subset(expression_long, symbol == GENE)
compare_means(expression_value ~ Gender, data = temp)
p1 <- ggpubr::ggboxplot(temp,
x = "Gender",
y = "expression_value",
color = "Gender",
palette = "npg",
add = "jitter",
ylab = paste0("normalized expression ", GENE,"" ),
repel = TRUE
) + stat_compare_means()
print(p1)
cat(paste0("Saving image for ", GENE,".\n"))
ggsave(filename = paste0(BOX_loc, "/", Today, ".",GENE,".expression_vs_gender.png"), plot = last_plot())
ggsave(filename = paste0(BOX_loc, "/", Today, ".",GENE,".expression_vs_gender.pdf"), plot = last_plot())
ggsave(filename = paste0(BOX_loc, "/", Today, ".",GENE,".expression_vs_gender.ps"), plot = last_plot())
rm(temp, p1)
}Plotting expression for ADAP1.
Saving image for ADAP1.
Plotting expression for BAHD1.
Saving image for BAHD1.
Plotting expression for CHAF1A.
Saving image for CHAF1A.
Plotting expression for DISP2.
Saving image for DISP2.
Plotting expression for FAM98B.
Saving image for FAM98B.
Plotting expression for GNA11.
Saving image for GNA11.
Plotting expression for GPR176.
Saving image for GPR176.
Plotting expression for KNL1.
Saving image for KNL1.
Plotting expression for MDFIC.
Saving image for MDFIC.
Plotting expression for MPND.
Saving image for MPND.
Plotting expression for NDUFA11.
Saving image for NDUFA11.
Plotting expression for PIP5K1C.
Saving image for PIP5K1C.
Plotting expression for SUN1.
Saving image for SUN1.
Plotting expression for TLE5.
Saving image for TLE5.
Plotting expression for ZFAND2A.
Saving image for ZFAND2A.
Figure 2A: Expression of genes of interest: histograms
# Make directory for plots
ifelse(!dir.exists(file.path(QC_loc, "/Histograms")),
dir.create(file.path(QC_loc, "/Histograms")),
FALSE)[1] TRUE
HISTOGRAM_loc = paste0(QC_loc,"/Histograms")
for(GENE in gene_list_qc){
# cat(paste0("Plotting expression for ", GENE,".\n"))
temp <- subset(expression_long, symbol == GENE)
p1 <- ggpubr::gghistogram(temp,
x = "expression_value",
y = "..count..",
color = "Gender", fill = "Gender",
palette = "npg",
add = "median",
ylab = paste0("normalized expression ", GENE,"" )
)
print(p1)
cat(paste0("Saving image for ", GENE,".\n"))
ggsave(filename = paste0(HISTOGRAM_loc, "/", Today, ".",GENE,".distribution.png"), plot = last_plot())
ggsave(filename = paste0(HISTOGRAM_loc, "/", Today, ".",GENE,".distribution.pdf"), plot = last_plot())
ggsave(filename = paste0(HISTOGRAM_loc, "/", Today, ".",GENE,".distribution.ps"), plot = last_plot())
rm(temp, p1 )
}Saving image for ADAP1.
Saving image for BAHD1.
Saving image for CHAF1A.
Saving image for DISP2.
Saving image for FAM98B.
Saving image for GNA11.
Saving image for GPR176.
Saving image for KNL1.
Saving image for MDFIC.
Saving image for MPND.
Saving image for NDUFA11.
Saving image for PIP5K1C.
Saving image for SUN1.
Saving image for TLE5.
Saving image for ZFAND2A.
Figure 2B: Expression of genes of interest: density plots
# Make directory for plots
ifelse(!dir.exists(file.path(QC_loc, "/Density")),
dir.create(file.path(QC_loc, "/Density")),
FALSE)[1] TRUE
DENSITY_loc = paste0(QC_loc,"/Density")
for(GENE in gene_list_qc){
# cat(paste0("Plotting expression for ", GENE,".\n"))
temp <- subset(expression_long, symbol == GENE)
p1 <- ggpubr::gghistogram(temp,
x = "expression_value",
y = "..density..",
color = "Gender", fill = "Gender",
palette = "npg",
add = "median",
ylab = paste0("normalized expression ", GENE,"" )
)
print(p1)
cat(paste0("Saving image for ", GENE,".\n"))
ggsave(filename = paste0(DENSITY_loc, "/", Today, ".",GENE,".density.png"), plot = last_plot())
ggsave(filename = paste0(DENSITY_loc, "/", Today, ".",GENE,".density.pdf"), plot = last_plot())
ggsave(filename = paste0(DENSITY_loc, "/", Today, ".",GENE,".density.ps"), plot = last_plot())
rm(temp, p1 )
}Saving image for ADAP1.
Saving image for BAHD1.
Saving image for CHAF1A.
Saving image for DISP2.
Saving image for FAM98B.
Saving image for GNA11.
Saving image for GPR176.
Saving image for KNL1.
Saving image for MDFIC.
Saving image for MPND.
Saving image for NDUFA11.
Saving image for PIP5K1C.
Saving image for SUN1.
Saving image for TLE5.
Saving image for ZFAND2A.
Figure 3: comparing expression of genes of interest to mean expression of a sample of 1,000 random genes
expression_wide <- expression_long %>%
tidyr::unite("symbol_ensembl", symbol:gene_ensembl, remove = TRUE, sep = "_") %>%
# dplyr::select(-gene_ensembl, -symbol) %>%
tidyr::spread(key = symbol_ensembl, value = expression_value) # the next 3 lines of code gave an error when selecting for genes_interest, since one of the genes of interest is missing: FGF3 is not in the data set. So, we need to select for the other 15 genes.
temp <- expression_long %>%
tidyr::unite("symbol_ensembl", symbol:gene_ensembl, remove = TRUE, sep = "_") %>%
dplyr::select(symbol_ensembl)
temp <- temp %>%
tidyr::separate(symbol_ensembl, c("symbol", "gene_ensembl"), remove = FALSE) %>%
dplyr::distinct(., symbol_ensembl, .keep_all = TRUE)
symbol_ensembl_gene_list_qc <- temp[temp$symbol %in% gene_list_qc,]$symbol_ensemblexpression_wide2 <- expression_wide %>%
left_join(expression_data_sample_mean, by = "study_number") %>%
dplyr::select(study_number, StudyName, symbol_ensembl_gene_list_qc, expression_value_sample)left_join: added one column (expression_value_sample) > rows only in x 0 > rows only in expression_data_sample_.. ( 0) > matched rows 624 > ===== > rows total 624Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(symbol_ensembl_gene_list_qc)
# Now:
data %>% select(all_of(symbol_ensembl_gene_list_qc))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
expression_long2 <- expression_wide2 %>%
gather(gene, expression_value, -study_number, -StudyName) %>%
mutate(gene = str_replace_all(gene, c("expression_value_sample" = "Random genes"))) #%>%gather: reorganized (MPND_ENSG00000008382, GNA11_ENSG00000088256, TLE5_ENSG00000104964, ADAP1_ENSG00000105963, MDFIC_ENSG00000135272, …) into (gene, expression_value) [was 624x18, now 9984x4]mutate: changed 624 values (6%) of 'gene' (0 new NAs)
mean_1000_genes <- mean(expression_data_sample_mean$expression_value_sample)
# head(expression_long2)
#
p1 <- ggpubr::ggboxplot(expression_long2,
x = "gene",
y = "expression_value",
color = uithof_color[16],
add = "jitter",
add.params = list(size = 3, jitter = 0.3),
ylab = paste0("normalized expression ")
) +
geom_hline(yintercept = mean_1000_genes, linetype = "dashed", color = uithof_color[26], size = 0.4) +
theme(axis.text.x = element_text(size = 10, angle = 45, hjust = 1, vjust = 1), # change orientation of x-axis labels
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14)) Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
p1
ggsave(filename = paste0(PLOT_loc, "/", Today, ".TargetExpression_vs_1000genes.png"), plot = last_plot())Saving 7.29 x 4.51 in image
ggsave(filename = paste0(PLOT_loc, "/", Today, ".TargetExpression_vs_1000genes.pdf"), plot = last_plot())Saving 7.29 x 4.51 in image
ggsave(filename = paste0(PLOT_loc, "/", Today, ".TargetExpression_vs_1000genes.ps"), plot = last_plot())Saving 7.29 x 4.51 in image
If we would put these correlations in one simple and comprehensible figure, we could use a correlation heatmap. Again, correlation coefficients used here are Spearman’s.
Figure 4: correlation heatmap between expression of genes of interest
library(tidyverse)
library(magrittr)
temp <- expression_wide %>%
column_to_rownames("study_number") %>%
dplyr::select(symbol_ensembl_gene_list_qc) %>%
drop_na() %>% # drop NA
Filter(function(x) sd(x) != 0, .) # filter variables with sd = 0
temp.cor <- cor(temp, method = "spearman")
p1 <- pheatmap(data.matrix(temp.cor),
scale = "none",
cluster_rows = TRUE,
cluster_cols = TRUE,
legend = TRUE,
fontsize = 10)
p1We are saving the final list of genes of interest
We will create a list of samples that should be included based on
CEA, and having the proper informed consent (‘academic’). We will save
the SummarizedExperiment as a RDS file for easy loading.
The clinical data will also be saved as a separate
txt-file.
To this end you need to lookup the EnsemblID (see above) for your target(s).
# targets = c("ENSG00000166033", # target
# "ENSG00000169174") # positive control PCSK9
# targets = c("ENSG00000169174") # target/positive control PCSK9
targets = unlist(temp$gene_ensembl)
rm(temp)We make a SummarizedExperiment for the subset of RNAseq
data. We make sure to only include the samples we need based on informed
consent and we include only the requested variables.
Next, we are constructing the SummarizedExperiment.
AERNAS_meta_data = c("SampleType", "RNAseqTech", "RNAseqType", "RNAseqQC",
"StudyType", "StudyName", "StudyBiobank", "SampleSize")
cat("\n* making a SummarizedExperiment ...\n")
* making a SummarizedExperiment ...
# this is all the data passing RNAseq quality control and UMI-corrected
# - after filtering on informed consent and artery type, the end sample size should be 1093
# - after filtering on 'no commercial business' based on informed consent, there are fewer samples: yyyy
cat(" > getting counts\n") > getting counts
[1] 15 624
> meta data and clinical data
# bulkRNA_meta_clin_COMMERCIAL <- subset(bulkRNA_meta_clin, select = c("study_number", basetable_vars))
bulkRNA_meta_clin_ACADEMIC <- subset(bulkRNA_meta_clin, select = c("study_number",
AERNAS_meta_data, # meta data of RNAseq experiment
basetable_vars)) # selection of variables
dim(bulkRNA_meta_clin_ACADEMIC)[1] 624 67
> rowRanges
# bulkRNA_meta_clin_COMMERCIAL <- subset(bulkRNA_meta_clin, select = c("study_number", basetable_vars))
bulkRNA_rowRanges <- rowRanges(AERNAScomboSE[targets])
bulkRNA_rowRangesGRanges object with 15 ranges and 2 metadata columns:
seqnames ranges strand | feature_id symbol
<Rle> <IRanges> <Rle> | <character> <character>
ENSG00000008382 19 4343527-4360086 + | ENSG00000008382 MPND
ENSG00000088256 19 3094362-3123999 + | ENSG00000088256 GNA11
ENSG00000104964 19 3052910-3063107 - | ENSG00000104964 TLE5
ENSG00000105963 7 897900-955407 - | ENSG00000105963 ADAP1
ENSG00000135272 7 114922094-115019917 + | ENSG00000135272 MDFIC
... ... ... ... . ... ...
ENSG00000167670 19 4402640-4445018 + | ENSG00000167670 CHAF1A
ENSG00000171262 15 38454127-38487710 + | ENSG00000171262 FAM98B
ENSG00000174886 19 5891276-5904006 - | ENSG00000174886 NDUFA11
ENSG00000178381 7 1152071-1160759 - | ENSG00000178381 ZFAND2A
ENSG00000186111 19 3630183-3700468 - | ENSG00000186111 PIP5K1C
-------
seqinfo: 331 sequences from an unspecified genome; no seqlengths
> construction of the SE
(AERNAScomboSE_target <- SummarizedExperiment(assays = list(counts = as.matrix(bulkRNA_countsFilt)),
colData = bulkRNA_meta_clin_ACADEMIC,
rowRanges = bulkRNA_rowRanges,
metadata = "Athero-Express Biobank Study bulk RNA sequencing. Sample type: carotid plaques. Technology: CEL2-seq adapted for bulk RNA sequencing, thus 3'-focused. UMI-corrected. VST and size factor normalized. Target subset."))class: RangedSummarizedExperiment
dim: 15 624
metadata(1): ''
assays(1): counts
rownames(15): ENSG00000008382 ENSG00000088256 ... ENSG00000178381 ENSG00000186111
rowData names(2): feature_id symbol
colnames(624): ae1 ae1026 ... ae998 ae999
colData names(67): study_number SampleType ... OverallPlaquePhenotype Plaque_Vulnerability_Index
* removing intermediate files ...
Sample list and clinical data.
temp <- as.tibble(subset(colData(AERNAScomboSE_target), select = c("study_number", AERNAS_meta_data)))
fwrite(temp,
file = paste0(OUT_loc, "/", Today, ".AERNAS1SE_target.CEA.624pts.samplelist.after_qc.IC_academic.csv"),
sep = ",", row.names = FALSE, col.names = TRUE,
showProgress = TRUE)
rm(temp)
temp <- as.tibble(subset(colData(AERNAScomboSE_target), select = c("study_number", basetable_vars)))
fwrite(temp,
file = paste0(OUT_loc, "/", Today, ".AERNAS1SE_target.CEA.624pts.clinicaldata.after_qc.IC_academic.csv"),
sep = ",", row.names = FALSE, col.names = TRUE,
showProgress = TRUE)
rm(temp)Assay data and gene information.
temp <- as_tibble(assay(AERNAScomboSE_target))
fwrite(temp,
file = paste0(OUT_loc, "/", Today, ".AERNAS1SE_target.CEA.624pts.assay.after_qc.IC_academic.csv"),
sep = ",", row.names = FALSE, col.names = TRUE,
showProgress = TRUE)
rm(temp)
temp <- as_tibble(rowRanges(AERNAScomboSE_target))
fwrite(temp,
file = paste0(OUT_loc, "/", Today, ".AERNAS1SE_target.CEA.624pts.genedata.after_qc.IC_academic.csv"),
sep = ",", row.names = FALSE, col.names = TRUE,
showProgress = TRUE)
rm(temp)The subset as SummarizedExperiment() dataset in
RDS-format.
Version: v1.0.1
Last update: 2025-03-28
Written by: Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
Description: Script to load bulk RNA sequencing data, and perform gene expression analyses, and visualisations.
Minimum requirements: R version 3.5.2 (2018-12-20) -- 'Eggshell Igloo', macOS Mojave (10.14.2).
**MoSCoW To-Do List**
The things we Must, Should, Could, and Would have given the time we have.
_M_
_S_
_C_
_W_
**Changes log**
* v1.0.1 Updated the script to include the latest data and to explore the subset of data.
* v1.0.0 Inital version. Re-arranged to explore the subset of data.
R version 4.4.3 (2025-02-28)
Platform: x86_64-apple-darwin20
Running under: macOS Sequoia 15.4
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Amsterdam
tzcode source: internal
attached base packages:
[1] stats4 grid tools stats graphics grDevices utils datasets methods base
other attached packages:
[1] magrittr_2.0.3 annotables_0.2.0 EnhancedVolcano_1.22.0 ggrepel_0.9.6 EnsDb.Hsapiens.v86_2.99.0
[6] ensembldb_2.28.1 AnnotationFilter_1.28.0 TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 mygene_1.40.0 txdbmaker_1.0.1
[11] org.Hs.eg.db_3.19.1 DESeq2_1.44.0 SummarizedExperiment_1.34.0 MatrixGenerics_1.16.0 matrixStats_1.4.1
[16] GenomicFeatures_1.56.0 AnnotationDbi_1.66.0 Biobase_2.64.0 GenomicRanges_1.56.2 GenomeInfoDb_1.40.1
[21] IRanges_2.38.1 S4Vectors_0.42.1 BiocGenerics_0.50.0 Hmisc_5.1-3 survminer_0.4.9
[26] survival_3.8-3 GGally_2.2.1 PerformanceAnalytics_2.0.4 xts_0.14.0 zoo_1.8-12
[31] ggcorrplot_0.1.4.999 corrr_0.4.4 reshape2_1.4.4 bacon_1.32.0 ellipse_0.5.0
[36] BiocParallel_1.38.0 meta_7.0-0 metadat_1.2-0 qqman_0.1.9 tidylog_1.1.0
[41] gridExtra_2.3 plyr_1.8.9 rmarkdown_2.28 officer_0.6.7 flextable_0.9.7
[46] writexl_1.5.1 patchwork_1.3.0.9000 labelled_2.13.0 sjPlot_2.8.16 UpSetR_1.4.0
[51] ggpubr_0.6.0.999 forestplot_3.1.5 abind_1.4-8 checkmate_2.3.2 pheatmap_1.0.12
[56] devtools_2.4.5 usethis_3.0.0 BlandAltmanLeh_0.3.1 tableone_0.13.2 openxlsx_4.2.7.1
[61] haven_2.5.4 eeptools_1.2.5 DT_0.33 knitr_1.48 lubridate_1.9.3
[66] forcats_1.0.0 stringr_1.5.1 purrr_1.0.2 tibble_3.2.1 ggplot2_3.5.1
[71] tidyverse_2.0.0 data.table_1.16.2 naniar_1.1.0 tidyr_1.3.1 dplyr_1.1.4
[76] optparse_1.7.5 readr_2.1.5 pander_0.6.5 R.utils_2.12.3 R.oo_1.26.0
[81] R.methodsS3_1.8.2 worcs_0.1.15 credentials_2.0.2
loaded via a namespace (and not attached):
[1] progress_1.2.3 sjmisc_2.8.10 urlchecker_1.0.1 nnet_7.3-20 Biostrings_2.72.1 katex_1.5.0 vctrs_0.6.5 rticles_0.27
[9] digest_0.6.37 png_0.1-8 proxy_0.4-27 renv_1.0.11 MASS_7.3-64 fontLiberation_0.1.0 httpuv_1.6.15 withr_3.0.2
[17] xfun_0.48 ellipsis_0.3.2 memoise_2.0.1 ggsci_3.2.0 profvis_0.4.0 calibrate_1.7.7 systemfonts_1.1.0 ragg_1.3.3
[25] V8_6.0.0 prettyunits_1.2.0 Formula_1.2-5 sys_3.4.3 datawizard_0.13.0 KEGGREST_1.44.1 promises_1.3.0 httr_1.4.7
[33] rstatix_0.7.2 restfulr_0.0.15 rstudioapi_0.16.0 UCSC.utils_1.0.0 miniUI_0.1.1.1 generics_0.1.3 base64enc_0.1-3 curl_5.2.3
[41] mitools_2.4 zlibbioc_1.50.0 ggeffects_1.7.2 GenomeInfoDbData_1.2.12 quadprog_1.5-8 SparseArray_1.4.8 xtable_1.8-4 evaluate_1.0.1
[49] S4Arrays_1.4.1 BiocFileCache_2.12.0 hms_1.1.3 filelock_1.0.3 colorspace_2.1-1 getopt_1.20.4 lmtest_0.9-40 later_1.3.2
[57] lattice_0.22-6 XML_3.99-0.17 survey_4.4-2 class_7.3-23 pillar_1.10.1 nlme_3.1-167 performance_0.12.3 compiler_4.4.3
[65] stringi_1.8.4 minqa_1.2.8 GenomicAlignments_1.40.0 crayon_1.5.3 BiocIO_1.14.0 chron_2.3-61 locfit_1.5-9.10 bit_4.5.0
[73] mathjaxr_1.6-0 codetools_0.2-20 textshaping_0.4.0 clisymbols_1.2.0 openssl_2.2.2 crosstalk_1.2.1 bslib_0.8.0 e1071_1.7-16
[81] mime_0.12 splines_4.4.3 Rcpp_1.0.13 dbplyr_2.5.0 blob_1.2.4 lme4_1.1-35.5 fs_1.6.4 prereg_0.6.0
[89] pkgbuild_1.4.4 gh_1.4.1 ggsignif_0.6.4 sqldf_0.4-11 Matrix_1.7-2 tzdb_0.4.0 visdat_0.6.0 pkgconfig_2.0.3
[97] equatags_0.2.1 cachem_1.1.0 RSQLite_2.3.7 DBI_1.2.3 numDeriv_2016.8-1.1 fastmap_1.2.0 scales_1.3.0 Rsamtools_2.20.0
[105] broom_1.0.7 sass_0.4.9 coda_0.19-4.1 ggstats_0.7.0 insight_0.20.5 carData_3.0-5 rpart_4.1.24 farver_2.1.2
[113] gsubfn_0.7 yaml_2.3.10 foreign_0.8-88 sjstats_0.19.0 rtracklayer_1.64.0 cli_3.6.4 lifecycle_1.0.4 rsconnect_1.3.1
[121] askpass_1.2.1 sessioninfo_1.2.2 backports_1.5.0 timechange_0.3.0 gtable_0.3.6 rjson_0.2.23 metafor_4.6-0 parallel_4.4.3
[129] jsonlite_1.8.9 bitops_1.0-9 bit64_4.5.2 proto_1.0.0 zip_2.3.1 ranger_0.16.0 jquerylib_0.1.4 survMisc_0.5.6
[137] lazyeval_0.2.2 shiny_1.9.1 xslt_1.4.6 htmltools_0.5.8.1 KMsurv_0.1-5 rappdirs_0.3.3 sjlabelled_1.2.0 tinytex_0.53
[145] glue_1.8.0 httr2_1.0.5 XVector_0.44.0 gdtools_0.4.0 RCurl_1.98-1.16 boot_1.3-31 R6_2.6.1 arm_1.14-4
[153] km.ci_0.5-6 vcd_1.4-13 CompQuadForm_1.4.3 labeling_0.4.3 cluster_2.1.8 pkgload_1.4.0 nloptr_2.1.1 ProtGenerics_1.36.0
[161] DelayedArray_0.30.1 tidyselect_1.2.1 htmlTable_2.4.3 xml2_1.3.6 fontBitstreamVera_0.1.1 car_3.1-3 munsell_0.5.1 fontquiver_0.2.1
[169] htmlwidgets_1.6.4 RColorBrewer_1.1-3 biomaRt_2.60.1 rlang_1.1.5 gert_2.1.4 uuid_1.2-1 remotes_2.5.0
| © 1979-2025 Sander W. van der Laan | s.w.vanderlaan[at]gmail[dot]com | vanderlaanand.science. |